Source code for hysop.backend.device.codegen.base.kernel_codegen

# Copyright (c) HySoP 2011-2024
#
# This file is part of HySoP software.
# See "https://particle_methods.gricad-pages.univ-grenoble-alpes.fr/hysop-doc/"
# for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from contextlib import contextmanager
import numpy as np

from hysop.tools.htypes import check_instance
from hysop.constants import Backend
from hysop.backend.device.codegen.base.opencl_codegen import OpenClCodeGenerator
from hysop.backend.device.codegen.base.function_codegen import FunctionBase
from hysop.backend.device.codegen.base.utils import VarDict, ArgDict, WriteOnceDict
from hysop.backend.device.opencl.opencl_types import OpenClTypeGen
from hysop.backend.device.codegen.base.variables import (
    CodegenVariable,
    CodegenVectorClBuiltin,
    CodegenVectorClBuiltinFunc,
)


[docs] class KernelBase(FunctionBase): def __init__( self, kname, vec_type_hint=None, kernel_args=None, known_args=None, **kargs ): super().__init__( fname=kname, output="void", inline=False, fargs=kernel_args, known_args=known_args, **kargs, )
[docs] class KernelCodeGenerator(KernelBase, OpenClCodeGenerator): def __init__( self, name, typegen, work_dim, symbolic_mode=True, kernel_args=None, known_vars=None, vec_type_hint=None, **kwds, ): kernel_args = ArgDict() if (kernel_args is None) else kernel_args known_vars = WriteOnceDict() if (known_vars is None) else known_vars check_instance(typegen, OpenClTypeGen) check_instance(kernel_args, ArgDict) assert work_dim > 0 and work_dim <= 3, work_dim if vec_type_hint is not None: if vec_type_hint not in typegen.builtin_types: msg = f"Invalid vec_type_hint '{vec_type_hint}'." raise ValueError(msg) if typegen.components(vec_type_hint) == 1: vec_type_hint = None self.vec_type_hint = vec_type_hint self.work_dim = work_dim if "work_dim" not in known_vars.keys(): known_vars["work_dim"] = work_dim known_args = {} for ka in kernel_args.keys(): if ka in known_vars.keys(): known_args[ka] = known_vars[ka] super().__init__( name=name, kname=name, typegen=typegen, symbolic_mode=symbolic_mode, kernel_args=kernel_args, known_args=known_args, known_vars=known_vars, **kwds, ) self.inject_vars(kernel_args) self.symbolic_mode = symbolic_mode self.gen_kernel_variables() self.gen_kernel_attributes() # return global_work_size from effective work_size and given local_work_size # /!\ it should be garanted that global_work_size is a multiple of local_work_size
[docs] def get_global_work_size(self, work_size, local_work_size): work_size = np.asarray(work_size) local_work_size = np.asarray(local_work_size) return ((work_size + local_work_size - 1) // local_work_size) * local_work_size
[docs] def min_ghosts(self): ghosts = (0,) * self.work_dim return np.asarray(ghosts)
# return a tuple of required (static,dynamic) cache bytes per workgroup
[docs] def required_workgroup_cache_size(self, local_work_size): return (0, 0)
[docs] def gen_kernel_variables(self): tg = self.typegen work_dim = self.work_dim sm = self.symbolic_mode kvars = VarDict() kvars["work_dim"] = CodegenVariable("work_dim", "uint", tg, symbolic_mode=sm) kvars["global_index"] = CodegenVariable("GID", "int", tg) kvars["local_index"] = CodegenVariable("LID", "int", tg) kvars["global_size"] = CodegenVectorClBuiltinFunc( "global_size", "G", "int", work_dim, tg, symbolic_mode=sm ) kvars["local_size"] = CodegenVectorClBuiltinFunc( "local_size", "L", "int", work_dim, tg, symbolic_mode=sm ) kvars["global_id"] = CodegenVectorClBuiltinFunc( "global_id", "gid", "int", work_dim, tg ) kvars["local_id"] = CodegenVectorClBuiltinFunc( "local_id", "lid", "int", work_dim, tg ) kvars["num_groups"] = CodegenVectorClBuiltinFunc( "num_groups", "ngroups", "int", work_dim, tg, symbolic_mode=sm ) kvars["group_id"] = CodegenVectorClBuiltinFunc( "group_id", "group_id", "int", work_dim, tg ) self.update_vars(kvars)
[docs] def gen_kernel_attributes(self): vec_type_hint = self.vec_type_hint local_work_size = self.vars["local_size"].value kernel_attributes = {} if local_work_size is not None: lws = tuple(local_work_size) + (1,) * (3 - len(local_work_size)) kernel_attributes["reqd_work_group_size"] = ( "reqd_work_group_size({},{},{})".format(lws[0], lws[1], lws[2]) ) # if (vec_type_hint is not None): # kernel_attributes['vec_type_hint'] = 'vec_type_hint({})'.format(vec_type_hint) self.kernel_attributes = kernel_attributes
[docs] def check_workitem_bounds(self, varname, compact=True): gid = self.vars["global_id"] if isinstance(varname, str): N = self.vars[varname] elif isinstance(varname, CodegenVariable): N = varname else: raise TypeError("varname") conditions = [f"({gid[i]}>={N[i]})" for i in range(self.work_dim)] cond = " || ".join(conditions) with self._if_(cond, compact=compact): self.append("return;")
@contextmanager def _kernel_(self): name = self.fname output = self.output fargs, fargs_impl, cargs = self.args.build_args() with self._codeblock_("global_scope_constants"): for carg in cargs: carg.ptr = False carg.storage = "__constant" carg.declare(self) with super()._kernel_( name=name, args=fargs, args_impl=fargs_impl, attributes=self.kernel_attributes, ) as k: yield k